#Code for CSD in census
ct <-  "35535"
cm  <-  "24462"
cv  <-  "59933"

#Code for central municipality in census
cmt <-  "3520005"
cmv <- "5915022"
cmm <- "2466023"

#Coordinates of city hall
cht  <-  c(-79.5257363, 43.6148863)
chv  <-  c(-123.2309412, 49.2219987)
chm  <-  c(-73.7171664, 45.4726094)

#Critical distance for AirBnb
bdtv <- 10000
bdm <-  12000

#Files from inside airbnb
f  <-  list.files('insideairbnb/')

## Identifying the vectors for visible Minority status
parent_vector <- "v_CA16_3954"
minorities <- list_census_vectors("CA16") %>% 
  filter(vector == "v_CA16_3954") %>% 
  child_census_vectors(leaves_only = TRUE) %>% 
  pull(vector)

minority_vectors <- c(parent_vector, minorities)
lab  <-  as.data.frame (list_census_vectors('CA16') %>% 
                       filter(vector %in% minorities) %>% 
                       select(vector, label))
montrealAirbnb <- read.csv(paste0('insideairbnb/', f[1]))
torontoAirbnb <- read.csv(paste0('insideairbnb/', f[2]))
vancouverAirbnb <- read.csv(paste0('insideairbnb/', f[3]))

df <-  rbind(montrealAirbnb, torontoAirbnb, vancouverAirbnb)
l <- levels(df$property_type)
lookup  <-  data.frame('type' = 1:length(l))
lookup$type <- as.factor(l)
lookup$property_group <- c(
  # [1] "Aparthotel"             "Apartment"              "Bed and breakfast"      "Boat"                   "Boutique hotel"         "Bungalow"               "Cabin"                 
  'hotel', 'home', 'hotel', 'other', 'hotel', 'home', 'other',
  #  [8] "Camper/RV"              "Campsite"               "Casa particular (Cuba)" "Cave"                   "Chalet"                 "Condominium"            "Cottage"               
  'other', 'other', 'home', 'other', 'home', 'home', 'home',
  # [15] "Farm stay"              "Guest suite"            "Guesthouse"             "Hostel"                 "Hotel"                  "House"                  "Houseboat"             
  'home', 'home', 'hotel', 'hotel', 'hotel', 'home', 'home',
  # [22] "Hut"                    "Loft"                   "Nature lodge"           "Other"                  "Serviced apartment"     "Tent"                   "Timeshare"             
  'other', 'home', 'other', 'other', 'hotel', 'other', 'other',
  # [29] "Tiny house"             "Townhouse"              "Villa"                  "Barn"                   "Castle"                 "Dorm"                   "Earth house"           
  'home', 'home', 'home', 'home', 'home', 'hotel', 'other',
  # [36] "In-law"                 "Parking Space"          "Treehouse"              "Resort"            
  'home', 'other', 'other', 'hotel'
)


dfhu <- left_join(df, lookup, by = c("property_type" = "type")) %>% 
  filter(property_group == 'home' & 
           as.character(host_neighbourhood) == as.character(neighbourhood) & 
           !duplicated(host_id))
## Warning: Column `property_type`/`type` joining factors with different
## levels, coercing to character vector
#df <-  data.frame(df,lookup[match(df$property_type, lookup$type),] )
#dfh  <-  subset(df, property_group == 'home' & as.character(df$host_neighbourhood) == as.character(df$neighbourhood))
#dfhu  <-  dfh[!duplicated(dfh$host_id),]
#dim(dfh)[1] - dim(df)[1]
#dim(dfhu)[1] - dim(dfh)[1]
dim(dfhu)[1] - dim(df)[1]
## [1] -19182
dfhu$year  <-  as.numeric(substr(dfhu$last_review, 1, 4))
dfhun <-  subset(dfhu, year >= 2017)
dim(dfhun)[1] - dim(dfhu)[1]
## [1] -7415
dfhun$numPrice <- as.numeric(gsub("[$]",'',dfhun$price))
## Warning: NAs introduits lors de la conversion automatique
summary(dfhun$room_type)
## Entire home/apt    Private room     Shared room 
##           11975            4497             142
final  <-  subset(dfhun, room_type != "Shared room")

final$rooms  <-  ifelse(final$bedrooms == 0, 1, final$bedrooms)
final$priceperroom  <-  as.numeric(ifelse(final$room_type == T,  final$numPrice,  final$numPrice / final$rooms))
dim(final) / dim(df)
## [1] 0.3811992 1.0520833

Census data of all Canadian Metros

ggplot(bind_rows(cma_seg %>% 
                   filter(Population > 100000) %>% 
                   clean_names2 %>% 
                   top_n(10, -H), 
                 cma_seg %>% 
                   filter(Population > 100000) %>% 
                   clean_names2 %>% 
                   top_n(10, H)), 
       aes(y = H, x = reorder(`Region Name`, H), size = Population)) + 
  #geom_bar(stat = "identity") +
  geom_point(colour = "#7e008c") + 
  coord_flip() + 
  theme_ipsum() +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        legend.position = "none") +
  labs(y = "Segregation entropy index", x = "", 
       #title = "The most and the least segregated large cities in Canada",
       caption = "Segregation index of visible minorities\n@dshkol | Data: Statistics Canada, Census 2016")

ggplot(cma_seg%>% filter(Population > 100000) %>% clean_names2 %>% 
         mutate(big_cma = ifelse(`CMA Population` > 1000000, `CMA Name`,"Other")),
       aes(y= H, x = Ei, size = Population^1.5, colour = big_cma)) +
  #geom_label_repel(aes(label = `Region Name`)) +
  geom_text_repel(aes(label = `Region Name`)) +
  scale_size_continuous(guide = FALSE) + 
  scale_colour_ipsum("", guide = FALSE) +
  theme_ipsum() +
  theme(panel.grid = element_blank(),
        axis.text = element_blank()) + 
  labs(x = "More diverse \u2192", y = "More segregated \u2192",
       caption = "Entropy index based calculations of diversity and segregation\nof visible minority groups in cities with population over 100,000\n@dshkol | Data: Statistics Canada, Census 2016")

Start city specific analysis

city <-  "toronto"

if(city == "toronto"){
  citynames  <-  unique(torontoAirbnb$city)
  censuscode  <-  ct
  cityhall  <-  cht
  censusCodeMuni  <- cmt 
  breakDist <-  bdtv
}
if(city == "vancouver"){
  citynames  <-  unique(vancouverAirbnb$city)
  censuscode  <-  cv
  cityhall  <-  chv
  censusCodeMuni  <-  cmv 
  breakDist  <-  bdtv
}
if(city == "montreal"){
  citynames  <-  unique(montrealAirbnb$city)
  censuscode  <-  cm
  cityhall  <-  chm
  censusCodeMuni <-  cmm
  breakDist  <-  bdm
}

city_data <-  subset(final, city %in% citynames)
tractTable <- diversity_index(censuscode)


pal <- colorQuantile(
  palette =  'Blues',
  domain = city_data$priceperroom,
  n = 10
)
pal2 <- colorQuantile(
  palette =  'Reds',
  domain = tractTable$Ei,
  n = 10)

map <- leaflet() %>% addProviderTiles("CartoDB.Positron") %>%
  addPolygons(
    data = csd.csd.geo,
    color = 'black',
    fill = F,
    weight = 0.7,
    opacity = 0.9
  ) %>% addPolygons(
    data = tractTable,
    color =  ~ pal2(Ei),
    fill = ~ pal2(Ei),
    weight = 0.4
  ) %>%
  #addCircleMarkers(
  #   data = city_data,
  #       radius = ~ sqrt(4 * rooms),
  #   lat = ~ latitude,
  #   fillColor = ~ pal(priceperroom),
  #   color = 'black',
  #   stroke = T,
  #   fillOpacity = 0.5,
  #  weight = 0.1,
  #   layerId = ~ id,
  #   lng = ~ longitude
# ) %>% 
# addLegend(pal = pal, position = 'topleft', values = city_data$priceperroom)%>% 
addLegend(pal = pal2, position = 'topleft', values = tractTable$Ei)

map
long.ct.tidy <- long.ct %>% 
  select(-White) %>% 
  tidyr::gather(Group, Count, `South Asian`:Multiple) %>% 
  mutate(Proportion = Count/Total)

ggplot(long.ct.tidy) + geom_sf(aes(fill = Proportion^(1/2), colour = Proportion^(1/2))) + 
  scale_fill_viridis_c(option = 3, guide = FALSE) + 
  scale_colour_viridis_c(option = 3, guide = FALSE) + 
  theme_void() + 
  coord_sf(datum = NA) +
  facet_wrap(~Group, ncol = 4)  +
  labs(caption = "Visible minority groups by square-root proportion of Census Tract population\n@dshkol | Data: Statistics Canada, Census 2016")

pal <- colorQuantile(
  palette =  'Blues',
  domain = city_data$priceperroom,
  n = 10
)

map <- leaflet() %>% addProviderTiles("CartoDB.Positron") %>%
  addPolygons(
    data = csd.csd.geo,
    color = 'black',
    fill = F,
    weight = 0.7,
    opacity = 0.9
  ) %>% addPolygons(
    data = csd.geo,
    color = 'grey',
    fill = F,
    weight = 0.4
  ) %>%
  addCircleMarkers(
    data = city_data,
    radius = ~ sqrt(4 * rooms),
    lat = ~ latitude,
    fillColor = ~ pal(priceperroom),
    color = 'black',
    stroke = T,
    fillOpacity = 0.5,
    weight = 0.1,
    layerId = ~ id,
    lng = ~ longitude
  ) %>% 
  addLegend(pal = pal, position = 'topleft', values = city_data$priceperroom)
## Warning in RColorBrewer::brewer.pal(max(3, n), palette): n too large, allowed maximum for palette Blues is 9
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(max(3, n), palette): n too large, allowed maximum for palette Blues is 9
## Returning the palette you asked for with that many colors
map
#############aggregate airbnb into city tracts.
city_tr  <-  csd.geo[csd.geo$CSD_UID == censusCodeMuni,]
#city_tr$vacancy = (city_tr$Dwellings - city_tr$Households) / city_tr$Dwellings
#save(toronto_tr, vancouver_tr, file="cityTractsWithPopAndVacancy.RData")


rbnb_pts <- city_data 
coordinates(rbnb_pts) <- ~longitude+latitude
city_tracts <- as_Spatial(city_tr)
rbnb_pts@proj4string <-CRS("+proj=longlat +datum=WGS84")
city_tracts@proj4string <-CRS("+proj=longlat +datum=WGS84")
plot(city_tracts, border = "grey")
points(rbnb_pts, col = "red", cex = 0.2, pch = 16)

#class(city_tracts)
#class(rbnb_pts)

PTS <- as(rbnb_pts, "sf")
POLY <- as(city_tracts, "sf")
idata <- st_intersection(PTS, POLY)
## although coordinates are longitude/latitude, st_intersection assumes that they are planar
## Warning: attribute variables are assumed to be spatially constant
## throughout all geometries
#head(idata)
idata$distribPrice <- cut2(idata$priceperroom, g = 10)
intervals <- levels(unique(idata$distribPrice))

rbnbPerTract <- as.data.frame(idata %>%
                                #group_by(CSD_UID) %>%
                                count(GeoUID, sort = TRUE) %>%
                                select(GeoUID, n))[,1:2]

rbnbDistributionPerTract <- as.data.frame(idata %>%
                                            #group_by(CSD_UID) %>%
                                            count(GeoUID, distribPrice) %>%
                                            select(GeoUID, distribPrice, n))[,1:3]


tractDistrib <- dcast(rbnbDistributionPerTract, GeoUID ~ distribPrice)
## Using n as value column: use value.var to override.
tractDistrib[is.na(tractDistrib)] <- 0
tractDistrib <- tractDistrib[,c("GeoUID", intervals)]
colnames(tractDistrib)[2:11] <- paste0("G", 1:10)

lookupDistrib <-  data.frame('name' = paste0("G", 1:10), 'interval' = intervals)

listingCounts <-  as.data.frame(rbnbPerTract[,1:2])
city_tracts@data <- data.frame(city_tracts@data,
                                  listingCounts[match(city_tracts@data$GeoUID,
                                                      listingCounts$GeoUID),])
city_tracts@data <- data.frame(city_tracts@data,
                                  tractDistrib[match(city_tracts@data$GeoUID,
                                                     tractDistrib$GeoUID),])
city_tracts@data <- data.frame(city_tracts@data,
                                  tractTable[match(city_tracts@data$GeoUID,
                                                   tractTable$GeoUID),])
Mino <-  as.data.frame(cma.ct[,c("GeoUID", minorities)])

city_tracts@data <- data.frame(city_tracts@data,
                                  Mino[match(city_tracts@data$GeoUID,
                                             Mino$GeoUID),])

city_tracts@data$ListingPerCapita <-  city_tracts@data$n / city_tracts@data$Population
city_tracts@data <- city_tracts@data %>% 
  mutate_at(minorities, funs(Share =./ Population) )

#summary(city_tracts@data)
centroidsCity <-  gCentroid(city_tracts,byid=TRUE)
#plot(centroidsCity, add = T, pch = 16, cex = 0.6)

listMinoShares  <-  paste0(minorities, "_Share")

EstimatorAirbnbPresence <-  cbind(city_tracts@data[,c("GeoUID", "ListingPerCapita", "Ei", 
                                                       listMinoShares)],
                                coordinates(centroidsCity))

#head(EstimatorAirbnbPresence)

DistCityHall <- distm(EstimatorAirbnbPresence[,c('x','y')], 
                      cityhall, fun=distVincentyEllipsoid)
EstimatorAirbnbPresence <-  cbind(EstimatorAirbnbPresence, DistCityHall)
hist(EstimatorAirbnbPresence$DistCityHall)

#head(EstimatorAirbnbPresence)

ggplot(EstimatorAirbnbPresence, aes(x = DistCityHall, y = ListingPerCapita)) +
  geom_point() + geom_smooth() + scale_x_log10() + scale_y_log10() +
  geom_vline(xintercept = breakDist, col = "orange", cex = 1)

# mean value of listing per capita of tracts between 0 & 10 km: 
central <- EstimatorAirbnbPresence %>%
  filter(DistCityHall <= breakDist, !is.na(ListingPerCapita)) %>%
  select(ListingPerCapita, DistCityHall, Ei, listMinoShares)

# regression listing per capita of tracts above 10 km: 
peripheral <- EstimatorAirbnbPresence %>%
  filter(DistCityHall > breakDist, !is.na(ListingPerCapita)) %>%
  select(ListingPerCapita, DistCityHall, Ei, listMinoShares)
for (sample in c("all", "central", "peripheral")){
  if (sample == "all") df <-  EstimatorAirbnbPresence
  if (sample == "central") df <-  central
  if (sample == "peripheral") df  <-  peripheral
  
  f <- paste0("scale(log(ListingPerCapita)) ~ scale(log(DistCityHall)) + ",
             paste0("scale(",listMinoShares[-1], ")", collapse = " + "))
  model <- lm(formula(f), data = df)
  print(paste0("obs = ", dim(df)[1]))
  print(summary(model))
  
  coeffs <-  as.data.frame(summary(model)$coefficients)
  coeffs$label <- substr(rownames(coeffs), 7, 17)
  res  <-  data.frame(coeffs, lab[match(coeffs$label, lab$vector),])
  res[,c("label", "vector")] <- NULL
  assign(paste0("Results_", sample), res)
}
## [1] "obs = 572"
## 
## Call:
## lm(formula = formula(f), data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.54853 -0.51794 -0.01125  0.52926  2.30101 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              -0.04765    0.03507  -1.359 0.174806    
## scale(log(DistCityHall)) -0.07253    0.04704  -1.542 0.123714    
## scale(v_CA16_3960_Share) -0.26653    0.05038  -5.291 1.83e-07 ***
## scale(v_CA16_3963_Share) -0.15991    0.04678  -3.419 0.000681 ***
## scale(v_CA16_3966_Share) -0.20791    0.05538  -3.754 0.000194 ***
## scale(v_CA16_3969_Share) -0.26156    0.04088  -6.398 3.63e-10 ***
## scale(v_CA16_3972_Share) -0.16263    0.05492  -2.962 0.003207 ** 
## scale(v_CA16_3975_Share)  0.01765    0.03999   0.442 0.659037    
## scale(v_CA16_3978_Share)  0.10754    0.05931   1.813 0.070389 .  
## scale(v_CA16_3981_Share) -0.16610    0.05065  -3.279 0.001113 ** 
## scale(v_CA16_3984_Share)  0.06116    0.04920   1.243 0.214358    
## scale(v_CA16_3987_Share)  0.17337    0.03916   4.427 1.17e-05 ***
## scale(v_CA16_3990_Share)  0.04046    0.04838   0.836 0.403315    
## scale(v_CA16_3993_Share)  0.10966    0.04433   2.474 0.013707 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7896 on 500 degrees of freedom
##   (58 observations deleted due to missingness)
## Multiple R-squared:  0.3912, Adjusted R-squared:  0.3753 
## F-statistic: 24.71 on 13 and 500 DF,  p-value: < 2.2e-16
## 
## [1] "obs = 129"
## 
## Call:
## lm(formula = formula(f), data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.47750 -0.51838  0.09242  0.44225  1.51489 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              -0.01227    0.06281  -0.195  0.84543    
## scale(log(DistCityHall))  0.26285    0.07838   3.354  0.00108 ** 
## scale(v_CA16_3960_Share) -0.05530    0.07929  -0.698  0.48689    
## scale(v_CA16_3963_Share)  0.44399    0.07910   5.613 1.42e-07 ***
## scale(v_CA16_3966_Share) -0.29703    0.09290  -3.197  0.00180 ** 
## scale(v_CA16_3969_Share) -0.04331    0.08340  -0.519  0.60457    
## scale(v_CA16_3972_Share) -0.12145    0.08968  -1.354  0.17832    
## scale(v_CA16_3975_Share)  0.19020    0.07179   2.650  0.00920 ** 
## scale(v_CA16_3978_Share) -0.04714    0.09046  -0.521  0.60326    
## scale(v_CA16_3981_Share) -0.16192    0.07353  -2.202  0.02967 *  
## scale(v_CA16_3984_Share) -0.13654    0.07289  -1.873  0.06361 .  
## scale(v_CA16_3987_Share)  0.09813    0.06953   1.411  0.16088    
## scale(v_CA16_3990_Share)  0.22111    0.07619   2.902  0.00445 ** 
## scale(v_CA16_3993_Share)  0.04013    0.07854   0.511  0.61042    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7105 on 114 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.5459, Adjusted R-squared:  0.4941 
## F-statistic: 10.54 on 13 and 114 DF,  p-value: 2.39e-14
## 
## [1] "obs = 386"
## 
## Call:
## lm(formula = formula(f), data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.55191 -0.43016 -0.00474  0.44182  2.07480 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               3.623e-16  3.495e-02   0.000 1.000000    
## scale(log(DistCityHall)) -5.083e-01  5.122e-02  -9.925  < 2e-16 ***
## scale(v_CA16_3960_Share) -1.422e-01  4.962e-02  -2.866 0.004398 ** 
## scale(v_CA16_3963_Share) -4.279e-02  4.479e-02  -0.955 0.340043    
## scale(v_CA16_3966_Share)  2.990e-03  5.644e-02   0.053 0.957782    
## scale(v_CA16_3969_Share) -2.048e-01  4.051e-02  -5.055 6.76e-07 ***
## scale(v_CA16_3972_Share) -2.893e-01  5.449e-02  -5.309 1.90e-07 ***
## scale(v_CA16_3975_Share) -6.089e-03  3.764e-02  -0.162 0.871593    
## scale(v_CA16_3978_Share) -1.614e-02  5.056e-02  -0.319 0.749748    
## scale(v_CA16_3981_Share) -1.773e-01  5.248e-02  -3.379 0.000805 ***
## scale(v_CA16_3984_Share)  1.069e-01  5.329e-02   2.006 0.045603 *  
## scale(v_CA16_3987_Share)  1.470e-01  4.048e-02   3.631 0.000322 ***
## scale(v_CA16_3990_Share) -1.055e-02  5.432e-02  -0.194 0.846094    
## scale(v_CA16_3993_Share)  1.332e-01  4.259e-02   3.127 0.001905 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6867 on 372 degrees of freedom
## Multiple R-squared:  0.5444, Adjusted R-squared:  0.5284 
## F-statistic: 34.19 on 13 and 372 DF,  p-value: < 2.2e-16
#str(Results_all)
reg_total  <-  cbind(Results_all,Results_central, Results_peripheral)
reg_total[,c("t.value", "Std..Error", "t.value.1", "Std..Error.1", "label.1.1", "t.value.2", "Std..Error.2", "label.1.2")] <- NULL
colnames(reg_total)  <-  c("est_all", "pval_all",  "minority",  "est_central", "pval_central", 
                        "est_peripheral", "pval_peripheral")
reg_total$variables  <-  rownames(reg_total)
reg_total$var  <-  ifelse(is.na(reg_total$minority), as.character(reg_total$variables), as.character(reg_total$minority))
pall <- ggplot(reg_total, aes(x = var)) + 
  geom_bar(aes(y = est_all, fill = ifelse(pval_all < 0.05, "pval < 0.05", "pval >= 0.05")), stat="identity") + 
  # scale_y_continuous(limits = c(-0.55, 0.55)) +
  coord_flip() + theme(legend.title=element_blank()) 
pcent <- ggplot(reg_total, aes(x = var)) + 
  geom_bar(aes(y = est_central, fill = ifelse(pval_central < 0.05, "pval < 0.05", "pval >= 0.05")), stat="identity") +  
  # scale_y_continuous(limits = c(-0.55, 0.55)) + 
  coord_flip() +   theme(legend.title=element_blank() ) 

pper <- ggplot(reg_total, aes(x = var)) + 
  geom_bar(aes(y = est_peripheral, fill = ifelse(pval_peripheral < 0.05, "pval < 0.05", "pval >= 0.05")), stat="identity") +
  #scale_y_continuous(limits = c(-0.55, 0.55)) + 
  coord_flip() +   theme(legend.title=element_blank()) 

grid.arrange(pall, pcent, pper)

EstimatorAirbnbPrice <- data.frame(idata, EstimatorAirbnbPresence[match(idata$GeoUID, 
                                                                        EstimatorAirbnbPresence$GeoUID),])
# head(EstimatorAirbnbPrice)
# hist(EstimatorAirbnbPrice$priceperroom)


EstimatorAirbnbPrice_all <- EstimatorAirbnbPrice %>%
  filter(!is.na(priceperroom), priceperroom > 0) 
# mean value of listing per capita of tracts between 0 & 10 km: 
central_P <- EstimatorAirbnbPrice_all %>%
  filter(DistCityHall <= breakDist) #%>%   select(ListingPerCapita, DistCityHall, Ei, listMinoShares)

peripheral_P <- EstimatorAirbnbPrice_all %>%
  filter(DistCityHall > breakDist) #%>%   select(ListingPerCapita, DistCityHall, Ei, listMinoShares)



for (sample in c("all", "central", "peripheral")){
  if (sample == "all") df <-  EstimatorAirbnbPrice_all
  if (sample == "central") df  <-  central_P
  if (sample == "peripheral") df  <-  peripheral_P
  
  f  <-  paste0("scale(priceperroom) ~ scale(log(DistCityHall)) + ",
             paste0("scale(",listMinoShares[-1], ")", collapse = " + "))
  model <- lm(formula(f), data = df)
  
  print(paste0("obs = ", dim(df)[1]))
  print(summary(model))
  
  coeffs  <-  as.data.frame(summary(model)$coefficients)
  coeffs$label <- substr(rownames(coeffs), 7, 17)
  res  <-  data.frame(coeffs, lab[match(coeffs$label, lab$vector),])
  res[,c("label", "vector")] <- NULL
  assign(paste0("Results_", sample, "Price"), res)
}
## [1] "obs = 6657"
## 
## Call:
## lm(formula = formula(f), data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1962 -0.6065 -0.1791  0.3766 10.5641 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               0.0001742  0.0117322   0.015  0.98815    
## scale(log(DistCityHall)) -0.0668709  0.0149279  -4.480 7.60e-06 ***
## scale(v_CA16_3960_Share) -0.0032345  0.0146358  -0.221  0.82510    
## scale(v_CA16_3963_Share) -0.0295906  0.0150768  -1.963  0.04973 *  
## scale(v_CA16_3966_Share) -0.0963053  0.0157190  -6.127 9.49e-10 ***
## scale(v_CA16_3969_Share) -0.1373439  0.0137331 -10.001  < 2e-16 ***
## scale(v_CA16_3972_Share) -0.0646569  0.0154912  -4.174 3.03e-05 ***
## scale(v_CA16_3975_Share)  0.2015608  0.0141764  14.218  < 2e-16 ***
## scale(v_CA16_3978_Share)  0.0195717  0.0152841   1.281  0.20040    
## scale(v_CA16_3981_Share) -0.0272682  0.0185182  -1.473  0.14093    
## scale(v_CA16_3984_Share) -0.0086998  0.0182519  -0.477  0.63363    
## scale(v_CA16_3987_Share) -0.0098371  0.0130038  -0.756  0.44939    
## scale(v_CA16_3990_Share) -0.0386677  0.0141334  -2.736  0.00624 ** 
## scale(v_CA16_3993_Share)  0.0860938  0.0135781   6.341 2.44e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9572 on 6642 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.08565,    Adjusted R-squared:  0.08386 
## F-statistic: 47.86 on 13 and 6642 DF,  p-value: < 2.2e-16
## 
## [1] "obs = 1865"
## 
## Call:
## lm(formula = formula(f), data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1669 -0.6003 -0.1782  0.3840  8.9505 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               0.0002938  0.0220161   0.013   0.9894    
## scale(log(DistCityHall))  0.1539406  0.0270725   5.686 1.51e-08 ***
## scale(v_CA16_3960_Share) -0.0202564  0.0327393  -0.619   0.5362    
## scale(v_CA16_3963_Share)  0.1265478  0.0294063   4.303 1.77e-05 ***
## scale(v_CA16_3966_Share) -0.0611201  0.0294178  -2.078   0.0379 *  
## scale(v_CA16_3969_Share)  0.1293594  0.0318532   4.061 5.09e-05 ***
## scale(v_CA16_3972_Share) -0.1237398  0.0289276  -4.278 1.99e-05 ***
## scale(v_CA16_3975_Share)  0.2395442  0.0310037   7.726 1.80e-14 ***
## scale(v_CA16_3978_Share) -0.0508944  0.0334406  -1.522   0.1282    
## scale(v_CA16_3981_Share) -0.0371391  0.0307432  -1.208   0.2272    
## scale(v_CA16_3984_Share) -0.0178721  0.0267618  -0.668   0.5043    
## scale(v_CA16_3987_Share)  0.0212608  0.0238212   0.893   0.3722    
## scale(v_CA16_3990_Share)  0.0150129  0.0305890   0.491   0.6236    
## scale(v_CA16_3993_Share)  0.0290583  0.0291923   0.995   0.3197    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9505 on 1850 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1029, Adjusted R-squared:  0.09657 
## F-statistic: 16.32 on 13 and 1850 DF,  p-value: < 2.2e-16
## 
## [1] "obs = 4792"
## 
## Call:
## lm(formula = formula(f), data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0444 -0.5511 -0.1503  0.3427  9.6695 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              -3.087e-15  1.316e-02   0.000  1.00000    
## scale(log(DistCityHall)) -3.644e-01  1.796e-02 -20.282  < 2e-16 ***
## scale(v_CA16_3960_Share)  6.234e-02  1.784e-02   3.494  0.00048 ***
## scale(v_CA16_3963_Share) -2.973e-02  1.638e-02  -1.815  0.06963 .  
## scale(v_CA16_3966_Share) -5.403e-02  1.975e-02  -2.735  0.00626 ** 
## scale(v_CA16_3969_Share) -8.783e-02  1.610e-02  -5.457 5.10e-08 ***
## scale(v_CA16_3972_Share) -1.186e-01  1.831e-02  -6.480 1.01e-10 ***
## scale(v_CA16_3975_Share)  1.143e-01  1.583e-02   7.219 6.08e-13 ***
## scale(v_CA16_3978_Share) -8.425e-03  1.696e-02  -0.497  0.61943    
## scale(v_CA16_3981_Share)  2.647e-02  2.146e-02   1.234  0.21741    
## scale(v_CA16_3984_Share)  3.419e-03  2.125e-02   0.161  0.87216    
## scale(v_CA16_3987_Share) -4.345e-02  1.475e-02  -2.945  0.00324 ** 
## scale(v_CA16_3990_Share) -6.594e-03  2.017e-02  -0.327  0.74379    
## scale(v_CA16_3993_Share)  6.013e-02  1.479e-02   4.067 4.84e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9111 on 4778 degrees of freedom
## Multiple R-squared:  0.1721, Adjusted R-squared:  0.1698 
## F-statistic: 76.39 on 13 and 4778 DF,  p-value: < 2.2e-16
reg_total2  <-  cbind(Results_allPrice,Results_centralPrice, Results_peripheralPrice)
reg_total2[,c("t.value", "Std..Error", "t.value.1", "Std..Error.1", "label.1.1", "t.value.2", "Std..Error.2", "label.1.2")] <- NULL
colnames(reg_total2) <-  c("est_all", "pval_all",  "minority",  "est_central", "pval_central", 
                         "est_peripheral", "pval_peripheral")
reg_total2$variables <-  rownames(reg_total2)
reg_total2$var  <-  ifelse(is.na(reg_total2$minority), as.character(reg_total2$variables), 
                        as.character(reg_total2$minority))
pall2 <- ggplot(reg_total2, aes(x = var)) + 
  geom_bar(aes(y = est_all, fill = ifelse(pval_all < 0.05, "pval < 0.05", "pval >= 0.05")), stat="identity") + 
  # scale_y_continuous(limits = c(-0.55, 0.55)) +
  coord_flip() + theme(legend.title=element_blank()) 
pcent2 <- ggplot(reg_total2, aes(x = var)) + 
  geom_bar(aes(y = est_central, fill = ifelse(pval_central < 0.05, "pval < 0.05", "pval >= 0.05")), stat="identity") +  
  # scale_y_continuous(limits = c(-0.55, 0.55)) + 
  coord_flip() + theme(legend.title=element_blank() ) 
pper2 <- ggplot(reg_total2, aes(x = var)) + 
  geom_bar(aes(y = est_peripheral, fill = ifelse(pval_peripheral < 0.05, "pval < 0.05", "pval >= 0.05")), stat="identity") +
  #scale_y_continuous(limits = c(-0.55, 0.55)) +
  coord_flip() + 
  theme(legend.title=element_blank()) 

grid.arrange(pall, pall2, 
             pcent, pcent2, 
             pper, pper2, 
             ncol = 2)

ReardonSegregationAirbnbPrice <- segIndex10(tabOfSpatialUnits = city_tracts@data, distributionColNames = paste0("G", 1:10))
EntropySegregationMinorities  <-  cma_seg[cma_seg$CSD_UID == censusCodeMuni,"H"]

ReardonSegregationAirbnbPrice
## [1] 0.2631066
EntropySegregationMinorities
## # A tibble: 1 x 1
##       H
##   <dbl>
## 1 0.180